In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
pd.__version__ # need 0.14.0 for multiindex slicing
Out[12]:
In [13]:
# all, k=10...200, m=10...200, vertical only, wrong compression rate
ol = pd.read_table("overall_statistics_klarge.txt").set_index(["K","M","STATISTIC"])["VALUE"].unstack()
vl = pd.read_table("variable_statistics_klarge.txt").set_index(["K","M","STATISTIC","VARIABLE"])["VALUE"].unstack().unstack()
In [14]:
# all, k=1...10, m=10...200, vertical only, wrong compression rate
os = pd.read_table("overall_statistics_ksmall.txt").set_index(["K","M","STATISTIC"])["VALUE"].unstack()
vs = pd.read_table("variable_statistics_ksmall.txt").set_index(["K","M","STATISTIC","VARIABLE"])["VALUE"].unstack().unstack()
In [15]:
# 3D, k=6...15, m=160...250, vertical only, correct compression rate
o3d = pd.read_table("overall_statistics_3d.txt").set_index(["K","M","STATISTIC"])["VALUE"].unstack()
v3d = pd.read_table("variable_statistics_3d.txt").set_index(["K","M","STATISTIC","VARIABLE"])["VALUE"].unstack().unstack()
In [16]:
N_c = 3008 # for all variables, vertical stacking
N_d = 48602 # for all variables, vertical stacking
original_size = N_c * N_d
compressed_size = lambda K, M: N_d + N_c * K + N_d * M + N_c * K * M
ol["compression_ratio_fixed"] = compressed_size(np.array(ol.index.get_level_values("K")),np.array(ol.index.get_level_values("M"))) / original_size
os["compression_ratio_fixed"] = compressed_size(np.array(os.index.get_level_values("K")),np.array(os.index.get_level_values("M"))) / original_size
In [17]:
# K large
grouped_data = vl.loc(axis=0)[10:100,:].mean(axis=1,level="STATISTIC").join(ol).reset_index().groupby("K")
for key,grp in grouped_data:
plt.plot(grp["compression_ratio_fixed"],grp["rms_error"],label="K = " + str(key))
plt.legend()
plt.xlabel("compression ratio")
plt.ylabel("mean rms error")
plt.title("error vs compression ratio, by K")
Out[17]:
In [18]:
# K small
grouped_data = vs.loc(axis=0)[:,:].mean(axis=1,level="STATISTIC").join(os).reset_index().groupby("K")
for key,grp in grouped_data:
plt.plot(grp["compression_ratio_fixed"],grp["rms_error"],label="K = " + str(key))
plt.legend()
plt.xlabel("compression ratio")
plt.ylabel("mean rms error")
plt.title("error vs compression ratio, by K")
Out[18]:
In [19]:
# K small, zoomed
grouped_data = vs.loc(axis=0)[5:10,:].mean(axis=1,level="STATISTIC").join(os).reset_index().groupby("K")
for key,grp in grouped_data:
plt.plot(grp["compression_ratio_fixed"],grp["rms_error"],label="K = " + str(key))
plt.legend()
plt.xlabel("compression ratio")
plt.ylabel("mean rms error")
plt.title("error vs compression ratio, by K")
plt.xlim((0.05,0.07))
plt.ylim((0.002,0.0035))
#plt.xlim((0.08,0.1))
#plt.ylim((0.0013,0.002))
Out[19]:
In [20]:
# K both, 3d only
grouped_data = v3d.loc(axis=0)[8:14,:].mean(axis=1,level="STATISTIC").join(o3d).reset_index().groupby("K")
for key,grp in grouped_data:
plt.plot(grp["compression_ratio"],grp["rms_error"],label="K = " + str(key))
plt.legend()
plt.xlabel("compression ratio")
plt.ylabel("mean rms error")
plt.title("error vs compression ratio, by K (3D only)")
plt.xlim((0.11,0.13))
plt.ylim((0.001,0.00115))
Out[20]:
In [21]:
# K small, zoomed
grouped_data = vs.loc(axis=0)[5:10,:].mean(axis=1,level="STATISTIC").join(os).reset_index().groupby("K")
for key,grp in grouped_data:
plt.plot(grp["time_solve"],grp["rms_error"],label="K = " + str(key))
plt.legend()
plt.xlabel("time to solve [s]")
plt.ylabel("mean rms error")
plt.title("error vs time to solve, by K")
plt.xlim((300,600))
plt.ylim((0.002,0.0035))
#plt.xlim((0.08,0.1))
#plt.ylim((0.0013,0.002))
Out[21]:
In [22]:
# K both, 3d only
grouped_data = v3d.loc(axis=0)[8:14,:].mean(axis=1,level="STATISTIC").join(o3d).reset_index().groupby("K")
for key,grp in grouped_data:
plt.plot(grp["time_solve"],grp["rms_error"],label="K = " + str(key))
plt.legend()
plt.xlabel("time to solve [s]")
plt.ylabel("mean rms error")
plt.title("error vs time to solve, by K (3D only)")
#plt.xlim((800,1100))
#plt.ylim((0.001,0.00115))
Out[22]: